---
title: "Data Science Salaries Dashboard"
output:
flexdashboard::flex_dashboard:
orientation: columns
source_code: embed
vertical_layout: scroll
theme: flatly
logo: "C:/Users/nishn/Downloads/icons8-money-with-wings-48.png"
---
<style>
.navbar {
background-color:purple;
border-color:white;
}
.navbar-brand {
color:black!important;
}
</style>
```{r setup, include=FALSE}
library(flexdashboard)
library(dplyr)
library(ggplot2)
library(DT)
library(plotly)
library(wordcloud2)
library(sf)
library(spData)
```
Data Table
===================
```{r}
data=read.csv("C:/Users/nishn/Downloads/ds_salaries.csv")
data<- data %>%
mutate(employment_type = case_when(
employment_type == "FT" ~ "Full Time",
employment_type == "PT" ~ "Part Time",
employment_type == "CT" ~ "Contract",
employment_type == "FL" ~ "Freelance",
TRUE ~ employment_type
))
data<- data %>%
mutate(experience_level = case_when(
experience_level == "EN" ~ "Entry Level/Junior",
experience_level == "MI" ~ "Mid-level/Intermediate",
experience_level == "SE" ~ "Senior-level/Expert",
experience_level == "EX" ~ "Executive-level/Director",
TRUE ~ experience_level
))
data<- data %>%
mutate(company_size = case_when(
company_size == "L" ~ "Large",
company_size == "M" ~ "Medium",
company_size == "S" ~ "Small",
TRUE ~ company_size
))
data$remote_ratio=as.character(data$remote_ratio)
data<- data %>%
mutate(remote_ratio = case_when(
remote_ratio == "0" ~ "No Remote Work",
remote_ratio == "50" ~ "Partially Remote",
remote_ratio == "100" ~ "Fully Remote",
TRUE ~ remote_ratio
))
datatable(data)
```
Designation Analysis
======================
Column {data-width=500}
-----------------------------------------------------------------------
### Job Designation and Experience Level
```{r}
top_job_titles <- data %>%
group_by(job_title) %>%
summarise(count = n()) %>%
arrange(desc(count)) %>%
slice_head(n = 15) %>%
pull(job_title)
# Filter the original data for the top 10 job titles
filtered_data <-data %>%
filter(job_title %in% top_job_titles)
# Create a clustered bar chart with ggplot for the top 15 job titles and experience levels
clustered_bar_chart_top15 <- ggplot(filtered_data, aes(x = job_title, fill = experience_level)) +
geom_bar(position = "dodge", stat = "count") +
labs(title = " Experience Level vs. Top 15 Job Designations",
x = "Job Title",
y = "Count",
fill="Experience Level") +
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1 ),legend.position = "bottom",
plot.title = element_text(hjust = 0.5))
# Convert ggplot to plotly for the top 15 job titles
interactive_clustered_bar_top15 <- ggplotly(clustered_bar_chart_top15, tooltip = "all")
interactive_clustered_bar_top15
```
### Job Designation - Word Cloud
```{r,fig.width=15}
text=data$job_title
my_data <- data.frame(text = text, freq = 1, stringsAsFactors = FALSE)
my_agr <- aggregate(freq ~ ., data = my_data, sum)
wordcloud2(data = my_agr,size=5)
```
### Job Designation and Salary
```{r}
violin_plot <- ggplot(filtered_data, aes(x = job_title, y = salary, fill = job_title)) +
geom_violin() +
labs(title = "Salary vs. Top 15 Job Titles",
x = "Job Title",
y = "Salary",
fill="Job Title") +
scale_y_continuous(limits=c(0,1000000))+
theme_minimal()+
theme(axis.text.x = element_text(angle = 45, hjust = 1 ),legend.position = "right",
plot.title = element_text(hjust = 0.5))
ggplotly(violin_plot)
```
Salary Analysis
==================
Column {data-width=500}
-----------------------------------------------------------------------
### Salary Distribution
```{r}
gg=ggplot(data, aes(x = salary_in_usd)) +
geom_density(alpha = 0.7,color="blue") +
geom_rug(color="red") + # Add a rug plot below
labs(title = "Salary Distribution", x = "Salary") +
theme_minimal()+
scale_x_continuous(labels = scales::number_format(scale = 1))+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(gg)
```
### Salary Distribution based on Company Size
```{r}
gg <- ggplot(data, aes(x = salary_in_usd, color = company_size)) +
geom_density(alpha = 0.7) +
labs(title = "Salary Distribution based on Company Size", x = "Salary", color = "Company Size") +
theme_minimal()+
scale_x_continuous(labels = scales::number_format(scale = 1))
# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg)
# Customize the appearance of the plotly chart
plotly_chart <- plotly_chart %>%
layout(title = "Salary Distribution based on Company Size", xaxis = list(title = "Salary"), yaxis = list(title = "Density"))
# Display the interactive plot
plotly_chart
```
Column {data-width=500}
-----------------------------------------------------------------------
### Median Salary by Experience Level and Remote Ratio
```{r}
grouped_data1 <- data %>%
group_by(experience_level, remote_ratio) %>%
summarise(median_salary = median(salary))
plot_ly(data = grouped_data1, x = ~experience_level, y = ~remote_ratio, z = ~median_salary, type = 'heatmap') %>%
layout(title = "Heatmap of Median Salary by Experience Level and Remote Ratio",
xaxis = list(title = "Experience Level", categoryorder = "array", categoryarray = c("Entry Level/Junior", "Mid-level/Intermediate", "Senior-level/Expert", "Executive-level/Director")),
yaxis = list(title = "Remote Ratio",categoryorder="array",categoryarray=c("Fully Remote","Partially Remote","No Remote Work")),
legend=list(title="Median Salary"))
```
### Salary Distribution based on Experience
```{r}
gg <- ggplot(data, aes(x = salary_in_usd, color = factor(experience_level))) +
geom_density(alpha = 0.8) +
labs(title = "Salary Distribution based on Experience", x = "Salary", color = "Experience") +
theme_minimal()+
scale_x_continuous(labels = scales::number_format(scale = 1))
# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg)
# Customize the appearance of the plotly chart
plotly_chart <- plotly_chart %>%
layout(title = "Salary Distribution based on Experience", xaxis = list(title = "Salary"), yaxis = list(title = "Density"))
# Display the interactive plot
plotly_chart
```
Max & Min Salary Analysis
====================
Column {data-width=500}
-----------------------------------------------------------------------
### Median Salary for Top 10 Highest Paid Job Titles
```{r}
top_salary <- data %>%
group_by(job_title) %>%
summarise(med_salary = median(salary_in_usd)) %>%
arrange(desc(med_salary)) %>%
head(10)
top_10=top_salary%>%
ggplot(aes(x = reorder(job_title, -med_salary), y = med_salary, fill = job_title,text = paste("Job Title: ", job_title, "<br>Median Salary: $", round(med_salary, 2)))) +
geom_bar(stat = "identity") +
labs(title = "Median Salary for Top 10 Highest Paid Job Titles",
x = "Job Title",
y = "Salary") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1),legend.position = "none",
plot.title = element_text(hjust = 0.5))+ scale_y_continuous(labels = scales::number_format(scale = 1))
ggplotly(top_10,tooltip = "text")
```
### Bubble Chart of Job Title v/s Experience Level by Salary (Highest Paid)
```{r}
med_salaries_high <-
inner_join(data,top_salary,by="job_title") %>%
group_by(job_title, experience_level) %>%
summarise(med_salary = median(salary_in_usd))
theme_set(theme_bw()) # pre-set the bw theme.
g <- ggplot(med_salaries_high, aes(job_title, experience_level,
text=paste("Job Title:",job_title, "<br>Experience Level:",experience_level, "<br>Salary:",round(med_salary,2),"$"))) +
labs(title="Job Title v/s Experience Level by Salary (Highest Paid)",
x="Job Title",
y="Experience Level")+
geom_jitter(aes(col=experience_level,size=med_salary))+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5),legend.position = "none")
ggplotly(g,tooltip = "text")
```
Column {data-width=500}
-----------------------------------------------------------------------
### Median Salary for Top 10 Lowest Paid Job Titles
```{r}
bottom_salary <- data %>%
group_by(job_title) %>%
summarise(med_salary = median(salary_in_usd)) %>%
arrange(med_salary) %>%
head(10)
bottom_10=bottom_salary%>%
ggplot(aes(x = reorder(job_title, med_salary), y = med_salary, fill = job_title,text = paste("Job Title: ", job_title, "<br>Median Salary: $", round(med_salary, 2)))) +
geom_bar(stat = "identity") +
labs(title = "Median Salary for Top 10 Lowest Paid Job Titles",
x = "Job Title",
y = "Salary") +
theme_bw()+
theme(axis.text.x = element_text(angle = 45, hjust = 1),legend.position = "none",
plot.title = element_text(hjust = 0.5))+ scale_y_continuous(labels = scales::number_format(scale = 1))
ggplotly(bottom_10,tooltip = "text")
```
### Bubble Chart of Job Title v/s Experience Level by Salary (Lowest Paid)
```{r}
med_salaries_low <-
inner_join(data,bottom_salary,by="job_title") %>%
group_by(job_title, experience_level) %>%
summarise(med_salary = median(salary_in_usd))
theme_set(theme_bw()) # pre-set the bw theme.
g <- ggplot(med_salaries_low, aes(job_title, experience_level,
text=paste("Job Title:",job_title, "<br>Experience Level:",experience_level, "<br>Salary:",round(med_salary,2),"$"))) +
labs(title="Job Title v/s Experience Level by Salary (Lowest Paid)",
x="Job Title",
y="Experience Level")+
geom_jitter(aes(col=experience_level,size=med_salary))+
theme(axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(hjust = 0.5),legend.position = "none")
ggplotly(g,tooltip = "text")
```
Work Year Analysis
==========================
Column {data-width=500}
-----------------------------------------------------------------------
### Scatter Plot of Median Salary by Year
```{r}
med_salaries<-
data %>%
group_by(work_year,experience_level) %>%
summarise(med_salary = median(salary_in_usd))
scatter_plot<- ggplot(med_salaries,aes(x =work_year, y = med_salary,
color = experience_level,group=experience_level,
text=paste("Year:",work_year,"<br>Salary:",round(med_salary,2),"$",
"<br>Experience Level:",experience_level))) +
geom_point(size = 3) +
geom_line(size = 1, aes(color = experience_level))+
labs(title = "Scatter Plot of Median Salary by Year",
x = "Year",
y = "Salary")+
theme(plot.title = element_text(hjust = 0.5))
ggplotly(scatter_plot,tooltip="text")
```
### Remote Ratio by Work Year
```{r}
percentage_data <- data %>%
group_by(work_year, remote_ratio) %>%
summarise(count = n()) %>%
group_by(work_year) %>%
mutate(percentage = count / sum(count) * 100)
# Create a stacked bar chart for remote ratio by work year (interactive with percentage on hover)
gg <- ggplot(percentage_data, aes(x = work_year, y = percentage, fill = remote_ratio)) +
geom_bar(stat = "identity") +
labs(title = "Remote Ratio by Work Year", x = "Work Year", y = "Percentage",
fill="Remote Ratio") +
scale_y_continuous(labels = scales::percent_format(scale = 1)) + # Format y-axis as percentage
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg, tooltip = c("work_year", "remote_ratio", "percentage"))
# Display the interactive plot
plotly_chart
```
Column {data-width=500}
-----------------------------------------------------------------------
### Percentage Distribution of Work Years
```{r}
average_salaries<-
data %>%
group_by(work_year,company_size)
percentage_data <- data %>%
group_by(work_year) %>%
summarize(count = n()) %>%
mutate(percentage = count / sum(count) * 100)
plot_ly(percentage_data, labels = ~work_year, values = ~percentage, type = "pie",
hoverinfo = "label+percent") %>%
layout(title = "Percentage Distribution of Work Years")
```
### Company Location by Work Year
```{r}
top_locations <- data %>%
group_by(company_location) %>%
summarise(location_count = n()) %>%
arrange(desc(location_count)) %>%
head(15)
ij=inner_join(top_locations,data,by="company_location")
ij <- left_join(ij, world %>% select(iso_a2, name_long), by = c("company_location" = "iso_a2")) %>%
mutate(company_location = coalesce(name_long, company_location)) %>%
select(-name_long) %>%
st_drop_geometry(geom)
percentage_data1<- ij %>%
group_by(work_year, company_location) %>%
summarise(count = n()) %>%
group_by(work_year) %>%
mutate(percentage = count / sum(count) * 100)
# Create a stacked bar chart for remote ratio by work year (interactive with percentage on hover)
gg <- ggplot(percentage_data1, aes(x = work_year, y = percentage, fill = company_location)) +
geom_bar(stat = "identity") +
coord_flip()+
labs(title = "Company Location by Work Year", x = "Work Year", y = "Percentage",
fill="Company Location") +
scale_y_continuous(labels = scales::percent_format(scale = 1)) + # Format y-axis as percentage
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
# Convert ggplot object to a plotly object
plotly_chart <- ggplotly(gg, tooltip = c("work_year", "company_location", "percentage"))
# Display the interactive plot
plotly_chart
```
Location Analysis
=====================
Column {data-width=500}
----------------------------
### Median Salary by Company Location
```{r map, echo=FALSE, results='asis'}
world <- spData::world
median_salary_by_country <- data %>%
group_by(company_location) %>%
summarise(med_salary = median(salary_in_usd)) %>%
arrange(desc(med_salary))
merged_data <- merge(world, median_salary_by_country, by.x = "iso_a2",
by.y = "company_location",all=TRUE)
jmd=inner_join(merged_data,median_salary_by_country,by=c("iso_a2"="company_location")) %>%
select(iso_a2,name_long,med_salary.x) %>%
st_drop_geometry()
plot_ly(data = jmd, x = ~reorder(name_long, -med_salary.x), y = ~med_salary.x,
type = 'bar', marker = list(color = rainbow(nrow(jmd)))) %>%
layout(title = "Median Salary by Country", xaxis = list(title = "Country"), yaxis = list(title = "Median Salary"))
```
### Top Employee Residence countries and their company location
```{r}
emp_data=data%>%
group_by(employee_residence) %>%
summarise(count=n()) %>%
arrange(desc(count)) %>%
head(10)
data_top_emp=merge(data,emp_data,by="employee_residence",all=TRUE)
data_top_emp <- data_top_emp %>%
filter(!is.na(count)) %>%
group_by(employee_residence)
dataind=data_top_emp %>%
filter(company_location!=employee_residence) %>%
select(company_location,employee_residence) %>%
group_by(company_location,employee_residence) %>%
summarise(count=n())
world=world %>% st_drop_geometry()
dataind <- left_join(dataind, world %>% select(iso_a2, name_long), by = c("company_location" = "iso_a2")) %>%
mutate(company_location = coalesce(name_long, company_location)) %>%
select(-name_long) %>%
st_drop_geometry(geom)
dataind <- left_join(dataind, world %>% select(iso_a2, name_long), by = c("employee_residence" = "iso_a2")) %>%
mutate(employee_residence = coalesce(name_long, employee_residence)) %>%
select(-name_long)%>%
st_drop_geometry()
# Create a ggplot object
p <- ggplot(dataind, aes(x = reorder(employee_residence, -count), text=paste("Company Location:",company_location,"<br>Count:",count), y = count, fill = dataind$company_location)) +
geom_bar(stat = "identity") +
labs(title = "Stacked Bar Chart of Counts by Employee Residence and Company Location",
x = "Employee Residence",
y = "Count",
fill="Company Location") +
theme_minimal()+
theme(plot.title = element_text(hjust = 0.5))
# Convert ggplot object to plotly
p <- ggplotly(p,tooltip ="text")
# Show the interactive plot
p
```